ggplot2library(gapminder)
ggplot(data = gapminder,
mapping = aes(x = year,
y = lifeExp)) +
geom_line()ggplot(data = gapminder,
mapping = aes(x = year,
y = lifeExp,
group = country)) +
geom_line()ggplot(data = gapminder,
mapping = aes(x = lifeExp,
y = gdpPercap,
color = continent)) +
geom_point()ggplot(data = gapminder,
mapping = aes(x = lifeExp,
y = gdpPercap,
color = continent,
# redundant
group = continent)) +
geom_point()ggplot(data = gapminder,
mapping = aes(x = year,
y = lifeExp)) +
geom_line(aes(group = country), alpha = .1) +
geom_smooth()ggplot(data = gapminder,
mapping = aes(x = year,
y = lifeExp)) +
geom_line(aes(group = country), alpha = .1) +
geom_smooth(method = "lm")ggplot(data = gss_sm,
mapping = aes(x = bigregion)) +
geom_bar()ggplot(data = gss_sm,
mapping = aes(x = bigregion)) +
geom_bar(aes(y = ..prop..))ggplot(data = gss_sm,
mapping = aes(x = bigregion,
group = 1)) +
geom_bar(aes(y = ..prop..))ggplot(data = gss_sm,
mapping = aes(x = religion,
fill = religion)) +
geom_bar()ggplot(data = gss_sm,
mapping = aes(x = religion, fill = religion)) +
geom_bar() +
guides(fill = FALSE) ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar()ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar(position = "fill")ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar(position = "dodge")ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar(aes(y = ..prop..), position = "dodge")ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion)) +
geom_bar(aes(y = ..prop..,
group = religion), position = "dodge")ggplot(data = gss_sm,
mapping = aes(x = religion)) +
geom_bar(aes(y = ..prop..,
group = bigregion), position = "dodge") +
facet_wrap(~ bigregion)glimpse(gss_sm)## Observations: 2,867
## Variables: 32
## $ year <dbl> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 20...
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ ballot <dbl+lbl> 1, 2, 3, 1, 3, 2, 1, 3, 1, 3, 2, 1, 2, 3, 2, 3...
## $ age <dbl> 47, 61, 72, 43, 55, 53, 50, 23, 45, 71, 33, 86, 32...
## $ childs <dbl> 3, 0, 2, 4, 2, 2, 2, 3, 3, 4, 5, 4, 3, 5, 7, 2, 6,...
## $ sibs <dbl+lbl> 2, 3, 3, 3, 2, 2, 2, 6, 5, 1, 4, 4, 3, 6, 0, 1...
## $ degree <fct> Bachelor, High School, Bachelor, High School, Grad...
## $ race <fct> White, White, White, White, White, White, White, O...
## $ sex <fct> Male, Male, Male, Female, Female, Female, Male, Fe...
## $ region <fct> New England, New England, New England, New England...
## $ income16 <fct> $170000 or over, $50000 to 59999, $75000 to $89999...
## $ relig <fct> None, None, Catholic, Catholic, None, None, None, ...
## $ marital <fct> Married, Never Married, Married, Married, Married,...
## $ padeg <fct> Graduate, Lt High School, High School, NA, Bachelo...
## $ madeg <fct> High School, High School, Lt High School, High Sch...
## $ partyid <fct> Independent, Ind,near Dem, Not Str Republican, Not...
## $ polviews <fct> Moderate, Liberal, Conservative, Moderate, Slightl...
## $ happy <fct> Pretty Happy, Pretty Happy, Very Happy, Pretty Hap...
## $ partners <fct> NA, 1 Partner, 1 Partner, NA, 1 Partner, 1 Partner...
## $ grass <fct> NA, Legal, Not Legal, NA, Legal, Legal, NA, Not Le...
## $ zodiac <fct> Aquarius, Scorpio, Pisces, Cancer, Scorpio, Scorpi...
## $ pres12 <dbl+lbl> 3, 1, 2, 2, 1, 1, NA, NA, NA, 2, NA, NA, 1, 1,...
## $ wtssall <dbl> 0.957, 0.478, 0.957, 1.914, 1.435, 0.957, 1.435, 0...
## $ income_rc <fct> Gt $170000, Gt $50000, Gt $75000, Gt $170000, Gt $...
## $ agegrp <fct> Age 45-55, Age 55-65, Age 65+, Age 35-45, Age 45-5...
## $ ageq <fct> Age 34-49, Age 49-62, Age 62+, Age 34-49, Age 49-6...
## $ siblings <fct> 2, 3, 3, 3, 2, 2, 2, 6+, 5, 1, 4, 4, 3, 6+, 0, 1, ...
## $ kids <fct> 3, 0, 2, 4+, 2, 2, 2, 3, 3, 4+, 4+, 4+, 3, 4+, 4+,...
## $ religion <fct> None, None, Catholic, Catholic, None, None, None, ...
## $ bigregion <fct> Northeast, Northeast, Northeast, Northeast, Northe...
## $ partners_rc <fct> NA, 1, 1, NA, 1, 1, NA, 1, NA, 3, 1, NA, 1, NA, 0,...
## $ obama <dbl> 0, 1, 0, 0, 1, 1, NA, NA, NA, 0, NA, NA, 1, 1, 0, ...
(rel_by_region <- gss_sm %>%
group_by(bigregion, religion) %>%
summarize(N = n()) %>%
mutate(freq = N / sum(N),
pct = round((freq * 100), 0)))## # A tibble: 24 x 5
## # Groups: bigregion [4]
## bigregion religion N freq pct
## <fct> <fct> <int> <dbl> <dbl>
## 1 Northeast Protestant 158 0.324 32.
## 2 Northeast Catholic 162 0.332 33.
## 3 Northeast Jewish 27 0.0553 6.
## 4 Northeast None 112 0.230 23.
## 5 Northeast Other 28 0.0574 6.
## 6 Northeast <NA> 1 0.00205 0.
## 7 Midwest Protestant 325 0.468 47.
## 8 Midwest Catholic 172 0.247 25.
## 9 Midwest Jewish 3 0.00432 0.
## 10 Midwest None 157 0.226 23.
## # ... with 14 more rows
ggplot(data = rel_by_region,
mapping = aes(x = bigregion,
y = pct,
fill = religion)) +
geom_col(position = "dodge2") +
labs(x = "Region", y = "Percent", fill = "Religion") +
theme(legend.position = "top")ggplot(data = rel_by_region,
mapping = aes(x = religion,
y = pct,
fill = religion)) +
geom_col(position = "dodge2") +
labs(x = "Region", y = "Percent", fill = "Religion") +
guides(fill = FALSE) +
coord_flip() +
facet_grid(~ bigregion)glimpse(organdata)## Observations: 238
## Variables: 21
## $ country <chr> "Australia", "Australia", "Australia", "Austr...
## $ year <date> NA, 1991-01-01, 1992-01-01, 1993-01-01, 1994...
## $ donors <dbl> NA, 12.09, 12.35, 12.51, 10.25, 10.18, 10.59,...
## $ pop <int> 17065, 17284, 17495, 17667, 17855, 18072, 183...
## $ pop_dens <dbl> 0.220, 0.223, 0.226, 0.228, 0.231, 0.233, 0.2...
## $ gdp <int> 16774, 17171, 17914, 18883, 19849, 21079, 219...
## $ gdp_lag <int> 16591, 16774, 17171, 17914, 18883, 19849, 210...
## $ health <dbl> 1300, 1379, 1455, 1540, 1626, 1737, 1846, 194...
## $ health_lag <dbl> 1224, 1300, 1379, 1455, 1540, 1626, 1737, 184...
## $ pubhealth <dbl> 4.8, 5.4, 5.4, 5.4, 5.4, 5.5, 5.6, 5.7, 5.9, ...
## $ roads <dbl> 136.6, 122.3, 112.8, 110.5, 108.0, 111.6, 107...
## $ cerebvas <int> 682, 647, 630, 611, 631, 592, 576, 525, 516, ...
## $ assault <int> 21, 19, 17, 18, 17, 16, 17, 17, 16, 15, 16, 1...
## $ external <int> 444, 425, 406, 376, 387, 371, 395, 385, 410, ...
## $ txp_pop <dbl> 0.938, 0.926, 0.915, 0.906, 0.896, 0.885, 0.8...
## $ world <chr> "Liberal", "Liberal", "Liberal", "Liberal", "...
## $ opt <chr> "In", "In", "In", "In", "In", "In", "In", "In...
## $ consent_law <chr> "Informed", "Informed", "Informed", "Informed...
## $ consent_practice <chr> "Informed", "Informed", "Informed", "Informed...
## $ consistent <chr> "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Ye...
## $ ccode <chr> "Oz", "Oz", "Oz", "Oz", "Oz", "Oz", "Oz", "Oz...
ggplot(data = organdata,
mapping = aes(x = year, y = donors)) +
geom_point()ggplot(data = organdata,
mapping = aes(x = year, y = donors)) +
geom_line(aes(group = country)) +
facet_wrap(~ country)ggplot(data = organdata,
mapping = aes(x = country, y = donors)) +
geom_boxplot()coord_flip()ggplot(data = organdata,
mapping = aes(x = country, y = donors)) +
geom_boxplot() +
coord_flip()reorder()ggplot(data = organdata,
mapping = aes(x = reorder(country, donors, na.rm = TRUE),
y = donors)) +
geom_boxplot() +
labs(x = NULL) +
coord_flip()ggplot(data = organdata,
mapping = aes(x = reorder(country, donors, na.rm = TRUE),
y = donors, fill = world)) +
geom_boxplot() +
labs(x = NULL) +
coord_flip() +
theme(legend.position = "bottom")ggplot(data = organdata,
mapping = aes(x = reorder(country, donors, na.rm = TRUE),
y = donors, color = world)) +
geom_point() +
labs(x = NULL) +
coord_flip() +
theme(legend.position = "bottom")ggplot(data = organdata,
mapping = aes(x = reorder(country, donors, na.rm = TRUE),
y = donors, color = world)) +
geom_jitter() +
labs(x = NULL) +
coord_flip() +
theme(legend.position = "bottom")(by_country <- organdata %>%
group_by(consent_law, country) %>%
summarize(donors_mean = mean(donors, na.rm = TRUE),
donors_sd = sd(donors, na.rm = TRUE),
gdp_mean = mean(gdp, na.rm = TRUE),
health_mean = mean(health, na.rm = TRUE),
roads_mean = mean(roads, na.rm = TRUE),
cerebvas_mean = mean(cerebvas, na.rm = TRUE)))## # A tibble: 17 x 8
## # Groups: consent_law [?]
## consent_law country donors_mean donors_sd gdp_mean health_mean
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Informed Australia 10.6 1.14 22179. 1958.
## 2 Informed Canada 14.0 0.751 23711. 2272.
## 3 Informed Denmark 13.1 1.47 23722. 2054.
## 4 Informed Germany 13.0 0.611 22163. 2349.
## 5 Informed Ireland 19.8 2.48 20824. 1480.
## 6 Informed Netherlands 13.7 1.55 23013. 1993.
## 7 Informed United Kingdom 13.5 0.775 21359. 1561.
## 8 Informed United States 20.0 1.33 29212. 3988.
## 9 Presumed Austria 23.5 2.42 23876. 1875.
## 10 Presumed Belgium 21.9 1.94 22500. 1958.
## 11 Presumed Finland 18.4 1.53 21019. 1615.
## 12 Presumed France 16.8 1.60 22603. 2160.
## 13 Presumed Italy 11.1 4.28 21554. 1757.
## 14 Presumed Norway 15.4 1.11 26448. 2217.
## 15 Presumed Spain 28.1 4.96 16933. 1289.
## 16 Presumed Sweden 13.1 1.75 22415. 1951.
## 17 Presumed Switzerland 14.2 1.71 27233. 2776.
## # ... with 2 more variables: roads_mean <dbl>, cerebvas_mean <dbl>
(by_country <- organdata %>%
group_by(consent_law, country) %>%
summarize_if(is.numeric, funs(mean, sd), na.rm = TRUE) %>%
ungroup())## # A tibble: 17 x 28
## consent_law country donors_mean pop_mean pop_dens_mean gdp_mean
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Informed Australia 10.6 18318. 0.237 22179.
## 2 Informed Canada 14.0 29608. 0.297 23711.
## 3 Informed Denmark 13.1 5257. 12.2 23722.
## 4 Informed Germany 13.0 80255. 22.5 22163.
## 5 Informed Ireland 19.8 3674. 5.23 20824.
## 6 Informed Netherlands 13.7 15548. 37.4 23013.
## 7 Informed United Kingdom 13.5 58187. 24.0 21359.
## 8 Informed United States 20.0 269330. 2.80 29212.
## 9 Presumed Austria 23.5 7927. 9.45 23876.
## 10 Presumed Belgium 21.9 10153. 30.7 22500.
## 11 Presumed Finland 18.4 5112. 1.51 21019.
## 12 Presumed France 16.8 58056. 10.5 22603.
## 13 Presumed Italy 11.1 57360. 19.0 21554.
## 14 Presumed Norway 15.4 4386. 1.35 26448.
## 15 Presumed Spain 28.1 39666. 7.84 16933.
## 16 Presumed Sweden 13.1 8789. 1.95 22415.
## 17 Presumed Switzerland 14.2 7037. 17.0 27233.
## # ... with 22 more variables: gdp_lag_mean <dbl>, health_mean <dbl>,
## # health_lag_mean <dbl>, pubhealth_mean <dbl>, roads_mean <dbl>,
## # cerebvas_mean <dbl>, assault_mean <dbl>, external_mean <dbl>,
## # txp_pop_mean <dbl>, donors_sd <dbl>, pop_sd <dbl>, pop_dens_sd <dbl>,
## # gdp_sd <dbl>, gdp_lag_sd <dbl>, health_sd <dbl>, health_lag_sd <dbl>,
## # pubhealth_sd <dbl>, roads_sd <dbl>, cerebvas_sd <dbl>,
## # assault_sd <dbl>, external_sd <dbl>, txp_pop_sd <dbl>
ggplot(data = by_country,
mapping = aes(x = donors_mean,
y = reorder(country, donors_mean),
color = consent_law)) +
geom_point(size = 3) +
labs(x = "Donor Procurement Rate",
y = "", color = "Consent Law") +
theme(legend.position = "top")ggplot(data = by_country,
mapping = aes(x = donors_mean,
y = reorder(country, donors_mean))) +
geom_point(size = 3) +
facet_wrap(~ consent_law, ncol = 1) +
labs(x = "Donor Procurement Rate",
y = "", color = "Consent Law")ggplot(data = by_country,
mapping = aes(x = donors_mean,
y = reorder(country, donors_mean))) +
geom_point(size = 3) +
facet_wrap(~ consent_law, scales = "free_y", ncol = 1) +
labs(x = "Donor Procurement Rate",
y = "", color = "Consent Law")ggplot(data = by_country,
mapping = aes(x = reorder(country, donors_mean),
y = donors_mean)) +
geom_pointrange(mapping = aes(ymin = donors_mean - donors_sd,
ymax = donors_mean + donors_sd)) +
labs(x = "",
y = "Donor Procurement Rate") +
coord_flip()geom_text()ggplot(data = by_country,
mapping = aes(x = roads_mean,
y = donors_mean)) +
geom_point() +
geom_text(mapping = aes(label = country))geom_text()ggplot(data = by_country,
mapping = aes(x = roads_mean,
y = donors_mean)) +
geom_point() +
geom_text(mapping = aes(label = country), hjust = 0)ggrepel::geom_text_repel()elections_historic %>%
select(2:7)## # A tibble: 49 x 6
## year winner win_party ec_pct popular_pct popular_margin
## <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1824 John Quincy Adams D.-R. 0.322 0.309 -0.104
## 2 1828 Andrew Jackson Dem. 0.682 0.559 0.122
## 3 1832 Andrew Jackson Dem. 0.766 0.547 0.178
## 4 1836 Martin Van Buren Dem. 0.578 0.508 0.142
## 5 1840 William Henry Harris… Whig 0.796 0.529 0.0605
## 6 1844 James Polk Dem. 0.618 0.495 0.0145
## 7 1848 Zachary Taylor Whig 0.562 0.473 0.0479
## 8 1852 Franklin Pierce Dem. 0.858 0.508 0.0695
## 9 1856 James Buchanan Dem. 0.588 0.453 0.122
## 10 1860 Abraham Lincoln Rep. 0.594 0.396 0.101
## # ... with 39 more rows
p_title <- "Presidential Elections: Popular & Electoral College Margins"
p_subtitle <- "1824-2016"
p_caption <- "Data for 2016 are provisional."
x_label <- "Winner's share of Popular Vote"
y_label <- "Winner's share of Electoral College Votes"
library(ggrepel)
ggplot(elections_historic, aes(x = popular_pct,
y = ec_pct,
label = winner_label)) +
geom_hline(yintercept = 0.5, size = 1.4, color = "gray80") +
geom_vline(xintercept = 0.5, size = 1.4, color = "gray80") +
geom_point() +
geom_text_repel() +
scale_x_continuous(labels = scales::percent) +
scale_y_continuous(labels = scales::percent) +
labs(x = x_label, y = y_label, title = p_title, subtitle = p_subtitle,
caption = p_caption)ggplot(data = by_country,
mapping = aes(x = gdp_mean, y = health_mean)) +
geom_point() +
geom_text_repel(data = filter(by_country, gdp_mean > 25000),
mapping = aes(label = country))ggplot(data = by_country,
mapping = aes(x = gdp_mean, y = health_mean)) +
geom_point() +
geom_text_repel(data = filter(by_country,
gdp_mean > 25000 | health_mean < 1500 |
country %in% "Belgium"),
mapping = aes(label = country))